Data Analysis
library(tidyverse)
library(readxl)
library(janitor)
library(data.table)
library(ggpubr)
library(rstatix)
library(viridis)
library(RColorBrewer)
library(lme4)
library(lmerTest)
Load Metadata
# Read in first metadata file
map <- read_excel("metadata/SEED LAB HMM Submission Sheet 202405.xlsx", skip = 9) %>%
clean_names()
# Replace column names with cleaned names
colnames(map) <- c("sampleid", "group", "bile_acids_panel", "pfbbr_scfa_panel",
"tryp_indole_panel", "tms_panel_1", "tms_panel_2", "tms_panel_3",
"untargeted_panel", "custom_targeted_panel",
"low_abundant_metabolite_panel", "sample_origin", "sample_type",
"mouse_number", "services_requested_per_sample", "mouse_strain",
"mouse_flora", "volume_ul", "sample_weight_mg", "experiment_treatment")
# Clean metadata
map <- map %>%
filter(!is.na(sampleid)) %>%
filter(sampleid != "") %>%
slice(-(1:3))
# Extract information from sample names
map <- map %>%
mutate(
# Extract subject ID (digits and letter before first space)
subject = str_extract(sampleid, "^[0-9]+[a-zA-Z]*"),
# Extract the part between spaces (e.g., "r1", "s2", "nc")
carb_repeat = str_extract(sampleid, "(?<= )[rs]\\d|nc(?= )"),
# Create carbohydrate_type column
carbohydrate_type = case_when(
str_detect(carb_repeat, "^r") ~ "rapid_digestible",
str_detect(carb_repeat, "^s") ~ "slow_digestible",
str_detect(carb_repeat, "^nc") ~ "no_carbohydrate",
TRUE ~ NA_character_
),
# Extract technical repeat number (handle "nc" case where there's no number)
technical_repeat = case_when(
str_detect(carb_repeat, "^nc") ~ 1L, # nc gets repeat 1
TRUE ~ as.integer(str_extract(carb_repeat, "\\d+"))
),
# Extract timepoint (after second space, before "h")
timepoint_hr = as.integer(str_extract(sampleid, "(?<= )\\d+(?=h$)"))
) %>%
# Remove the temporary carb_repeat column and select relevant columns
select(-carb_repeat) %>%
select(sampleid, group, subject, carbohydrate_type, technical_repeat, timepoint_hr)
# Load second metadata file
map2 <- read_excel("metadata/PS2720_xtra_samples_HMM Submission Sheet.xlsx", skip = 9) %>%
clean_names()
# Apply same processing as first file
colnames(map2) <- c("sampleid", "group", "bile_acids_panel", "pfbbr_scfa_panel",
"tryp_indole_panel", "tms_panel_1", "tms_panel_2", "tms_panel_3",
"untargeted_panel", "custom_targeted_panel",
"low_abundant_metabolite_panel", "sample_origin", "sample_type",
"mouse_number", "services_requested_per_sample", "mouse_strain",
"mouse_flora", "volume_ul", "sample_weight_mg", "experiment_treatment")
map2 <- map2 %>%
filter(!is.na(sampleid)) %>%
filter(sampleid != "") %>%
slice(-(1:3)) %>%
mutate(
# Extract subject ID (digits and letter before first space)
subject = str_extract(sampleid, "^[0-9]+[a-zA-Z]*"),
carb_repeat = str_extract(sampleid, "(?<= )[rs]\\d|nc(?= )"),
carbohydrate_type = case_when(
str_detect(carb_repeat, "^r") ~ "rapid_digestible",
str_detect(carb_repeat, "^s") ~ "slow_digestible",
str_detect(carb_repeat, "^nc") ~ "no_carbohydrate",
TRUE ~ NA_character_
),
technical_repeat = case_when(
str_detect(carb_repeat, "^nc") ~ 1L,
TRUE ~ as.integer(str_extract(carb_repeat, "\\d+"))
),
timepoint_hr = as.integer(str_extract(sampleid, "(?<= )\\d+(?=h$)"))
) %>%
select(-carb_repeat) %>%
select(sampleid, group, subject, carbohydrate_type, technical_repeat, timepoint_hr)
# Combine metadata files
map <- bind_rows(map, map2)
# Make group names lowercase for consistency
map <- map %>%
mutate(group = tolower(group))
Load SCFA Data
# Read in SCFA quantification data
data <- fread("data/removed_qcs_quant_results_20250722_PFBBr_PS2720_20250731.csv")
# Join metadata with SCFA data
sample_data <- left_join(map, data, by = "sampleid")
# Ensure group names are lowercase
sample_data <- sample_data %>%
mutate(group = tolower(group))
# Display data structure
cat("Dataset dimensions:", dim(sample_data), "\n")
## Dataset dimensions: 442 14
cat("Sample groups:", unique(sample_data$group), "\n")
## Sample groups: case control NA
cat("Carbohydrate types:", unique(sample_data$carbohydrate_type), "\n")
## Carbohydrate types: rapid_digestible slow_digestible no_carbohydrate NA
cat("Time points:", unique(sample_data$timepoint_hr), "\n")
## Time points: 0 48 NA
Summary Statistics
# Define SCFA analytes (excluding non-SCFA compounds)
scfa_analytes <- c("acetate", "butyrate", "propionate", "5aminovalerate", "succinate")
# Convert data to long format and average technical replicates
sample_data_long <- sample_data %>%
pivot_longer(cols = all_of(scfa_analytes),
names_to = "analyte",
values_to = "concentration") %>%
filter(!is.na(concentration)) %>%
# Average technical replicates FIRST before any statistical analysis
group_by(subject, group, carbohydrate_type, timepoint_hr, analyte) %>%
summarise(concentration = mean(concentration, na.rm = TRUE), .groups = "drop")
cat("Total observations after averaging technical replicates:", nrow(sample_data_long), "\n")
## Total observations after averaging technical replicates: 1010
Summary by Group
summary_by_group <- sample_data_long %>%
group_by(group, analyte) %>%
summarise(
n = n(),
mean = mean(concentration, na.rm = TRUE),
median = median(concentration, na.rm = TRUE),
sd = sd(concentration, na.rm = TRUE),
sem = sd / sqrt(n),
q25 = quantile(concentration, 0.25, na.rm = TRUE),
q75 = quantile(concentration, 0.75, na.rm = TRUE),
.groups = "drop"
) %>%
# Order so control and case are adjacent for each analyte
arrange(analyte, group)
knitr::kable(summary_by_group, digits = 3,
caption = "Summary Statistics by Experimental Group")
Summary Statistics by Experimental Group
| case |
5aminovalerate |
106 |
0.506 |
0.312 |
0.601 |
0.058 |
0.050 |
0.815 |
| control |
5aminovalerate |
96 |
0.549 |
0.415 |
0.539 |
0.055 |
0.050 |
1.070 |
| case |
acetate |
106 |
18.567 |
24.678 |
17.716 |
1.721 |
1.196 |
32.001 |
| control |
acetate |
96 |
18.722 |
21.415 |
17.680 |
1.804 |
1.182 |
33.516 |
| case |
butyrate |
106 |
3.754 |
3.100 |
3.844 |
0.373 |
0.295 |
7.152 |
| control |
butyrate |
96 |
4.205 |
3.350 |
4.408 |
0.450 |
0.270 |
7.136 |
| case |
propionate |
106 |
2.840 |
1.822 |
2.983 |
0.290 |
0.170 |
5.194 |
| control |
propionate |
96 |
2.736 |
1.982 |
2.862 |
0.292 |
0.159 |
5.366 |
| case |
succinate |
106 |
0.995 |
0.225 |
1.650 |
0.160 |
0.000 |
1.150 |
| control |
succinate |
96 |
1.254 |
0.270 |
1.885 |
0.192 |
0.000 |
1.646 |
Summary by Carbohydrate Type
summary_by_carb <- sample_data_long %>%
group_by(carbohydrate_type, analyte) %>%
summarise(
n = n(),
mean = mean(concentration, na.rm = TRUE),
median = median(concentration, na.rm = TRUE),
sd = sd(concentration, na.rm = TRUE),
sem = sd / sqrt(n),
q25 = quantile(concentration, 0.25, na.rm = TRUE),
q75 = quantile(concentration, 0.75, na.rm = TRUE),
.groups = "drop"
) %>%
# Order so the 3 carbohydrate types are in series with each analyte
arrange(analyte, carbohydrate_type)
knitr::kable(summary_by_carb, digits = 3,
caption = "Summary Statistics by Carbohydrate Type")
Summary Statistics by Carbohydrate Type
| no_carbohydrate |
5aminovalerate |
66 |
0.560 |
0.285 |
0.574 |
0.071 |
0.050 |
1.078 |
| rapid_digestible |
5aminovalerate |
68 |
0.499 |
0.295 |
0.562 |
0.068 |
0.050 |
0.805 |
| slow_digestible |
5aminovalerate |
68 |
0.521 |
0.340 |
0.586 |
0.071 |
0.050 |
0.830 |
| no_carbohydrate |
acetate |
66 |
15.881 |
18.985 |
14.820 |
1.824 |
1.210 |
29.773 |
| rapid_digestible |
acetate |
68 |
19.241 |
24.678 |
18.220 |
2.209 |
1.183 |
33.956 |
| slow_digestible |
acetate |
68 |
20.719 |
26.273 |
19.437 |
2.357 |
1.210 |
37.739 |
| no_carbohydrate |
butyrate |
66 |
3.252 |
2.830 |
3.044 |
0.375 |
0.312 |
6.425 |
| rapid_digestible |
butyrate |
68 |
4.059 |
2.650 |
4.550 |
0.552 |
0.269 |
7.510 |
| slow_digestible |
butyrate |
68 |
4.572 |
4.608 |
4.503 |
0.546 |
0.274 |
7.875 |
| no_carbohydrate |
propionate |
66 |
3.206 |
2.965 |
3.191 |
0.393 |
0.182 |
6.220 |
| rapid_digestible |
propionate |
68 |
2.321 |
1.895 |
2.416 |
0.293 |
0.164 |
3.840 |
| slow_digestible |
propionate |
68 |
2.858 |
1.868 |
3.076 |
0.373 |
0.168 |
5.366 |
| no_carbohydrate |
succinate |
66 |
0.661 |
0.240 |
1.035 |
0.127 |
0.000 |
0.720 |
| rapid_digestible |
succinate |
68 |
1.227 |
0.248 |
1.859 |
0.225 |
0.111 |
1.646 |
| slow_digestible |
succinate |
68 |
1.453 |
0.240 |
2.127 |
0.258 |
0.068 |
2.042 |
Summary by Time Point
summary_by_time <- sample_data_long %>%
group_by(timepoint_hr, analyte) %>%
summarise(
n = n(),
mean = mean(concentration, na.rm = TRUE),
median = median(concentration, na.rm = TRUE),
sd = sd(concentration, na.rm = TRUE),
sem = sd / sqrt(n),
q25 = quantile(concentration, 0.25, na.rm = TRUE),
q75 = quantile(concentration, 0.75, na.rm = TRUE),
.groups = "drop"
) %>%
# Order so that 0h and 48h are adjacent per analyte
arrange(analyte, timepoint_hr)
knitr::kable(summary_by_time, digits = 3,
caption = "Summary Statistics by Time Point")
Summary Statistics by Time Point
| 0 |
5aminovalerate |
102 |
0.113 |
0.050 |
0.204 |
0.020 |
0.050 |
0.075 |
| 48 |
5aminovalerate |
100 |
0.948 |
0.840 |
0.515 |
0.051 |
0.595 |
1.201 |
| 0 |
acetate |
102 |
3.650 |
1.197 |
10.307 |
1.021 |
1.004 |
1.399 |
| 48 |
acetate |
100 |
33.931 |
32.840 |
7.535 |
0.754 |
28.876 |
38.964 |
| 0 |
butyrate |
102 |
0.758 |
0.272 |
1.677 |
0.166 |
0.221 |
0.435 |
| 48 |
butyrate |
100 |
7.243 |
6.803 |
3.178 |
0.318 |
4.898 |
9.411 |
| 0 |
propionate |
102 |
0.526 |
0.167 |
1.413 |
0.140 |
0.121 |
0.230 |
| 48 |
propionate |
100 |
5.101 |
4.892 |
2.144 |
0.214 |
3.530 |
6.626 |
| 0 |
succinate |
102 |
0.407 |
0.148 |
1.235 |
0.122 |
0.000 |
0.250 |
| 48 |
succinate |
100 |
1.843 |
1.505 |
1.929 |
0.193 |
0.000 |
3.133 |
Combined Summary Statistics
summary_combined <- sample_data_long %>%
group_by(group, carbohydrate_type, timepoint_hr, analyte) %>%
summarise(
n = n(),
mean = mean(concentration, na.rm = TRUE),
median = median(concentration, na.rm = TRUE),
sd = sd(concentration, na.rm = TRUE),
sem = sd / sqrt(n),
q25 = quantile(concentration, 0.25, na.rm = TRUE),
q75 = quantile(concentration, 0.75, na.rm = TRUE),
.groups = "drop"
)
# Create results directory and save combined summary
dir.create("results", showWarnings = FALSE)
write.csv(summary_combined, "results/combined_summary_statistics.csv", row.names = FALSE)
cat("Combined summary table contains", nrow(summary_combined), "condition combinations\n")
## Combined summary table contains 60 condition combinations
cat("Combined summary saved to results/combined_summary_statistics.csv\n")
## Combined summary saved to results/combined_summary_statistics.csv
Statistical Analysis
# Prepare data with proper factor levels for statistical testing
sample_data_long <- sample_data_long %>%
mutate(
group = factor(group, levels = c("control", "case")),
carbohydrate_type = factor(carbohydrate_type,
levels = c("no_carbohydrate", "rapid_digestible", "slow_digestible")),
timepoint_hr = factor(timepoint_hr),
subject = factor(subject) # Add subject as factor for random effects
)
# Statistical tests by group (t-tests)
group_stats <- sample_data_long %>%
group_by(analyte) %>%
t_test(concentration ~ group) %>%
adjust_pvalue(method = "BH") %>%
add_significance()
# Statistical tests by carbohydrate type (ANOVA with post-hoc)
carb_stats <- sample_data_long %>%
group_by(analyte) %>%
anova_test(concentration ~ carbohydrate_type) %>%
adjust_pvalue(method = "BH") %>%
add_significance()
# Post-hoc comparisons for carbohydrate type (vs no_carbohydrate)
carb_posthoc <- sample_data_long %>%
group_by(analyte) %>%
pairwise_t_test(concentration ~ carbohydrate_type,
ref.group = "no_carbohydrate") %>%
adjust_pvalue(method = "BH") %>%
add_significance()
# Three-way ANOVA: Group x Carbohydrate x Time interaction
interaction_stats <- sample_data_long %>%
group_by(analyte) %>%
anova_test(concentration ~ group * carbohydrate_type * timepoint_hr) %>%
adjust_pvalue(method = "BH") %>%
add_significance()
# Mixed-effects models accounting for subject as random effect
mixed_effects_results <- list()
for (analyte_name in scfa_analytes) {
analyte_data <- sample_data_long %>% filter(analyte == analyte_name)
# Full model with all interactions and subject as random intercept
full_model <- lmer(concentration ~ group * carbohydrate_type * timepoint_hr +
(1 | subject),
data = analyte_data,
REML = FALSE)
# Model without three-way interaction
no_three_way <- lmer(concentration ~ group + carbohydrate_type + timepoint_hr +
group:carbohydrate_type + group:timepoint_hr +
carbohydrate_type:timepoint_hr + (1 | subject),
data = analyte_data,
REML = FALSE)
# Model with only main effects
main_effects <- lmer(concentration ~ group + carbohydrate_type + timepoint_hr +
(1 | subject),
data = analyte_data,
REML = FALSE)
# Store results
mixed_effects_results[[analyte_name]] <- list(
full_model = full_model,
no_three_way = no_three_way,
main_effects = main_effects,
anova_full = anova(full_model),
summary_full = summary(full_model)
)
}
# Subject-level summary statistics
subject_summary <- sample_data_long %>%
group_by(subject, group, analyte) %>%
summarise(
mean_conc = mean(concentration, na.rm = TRUE),
n_observations = n(),
.groups = "drop"
) %>%
group_by(group, analyte) %>%
summarise(
n_subjects = n(),
mean_subject_means = mean(mean_conc, na.rm = TRUE),
sd_subject_means = sd(mean_conc, na.rm = TRUE),
sem_subjects = sd_subject_means / sqrt(n_subjects),
.groups = "drop"
)
# Within-subject changes (baseline to 48h)
within_subject_changes <- sample_data_long %>%
filter(timepoint_hr %in% c("0", "48")) %>%
mutate(timepoint_hr = as.character(timepoint_hr)) %>% # Convert back to character for pivot
select(subject, group, carbohydrate_type, timepoint_hr, analyte, concentration) %>%
pivot_wider(names_from = timepoint_hr,
values_from = concentration,
names_prefix = "time_") %>%
mutate(change_0_to_48 = time_48 - time_0) %>%
filter(!is.na(change_0_to_48))
# Statistical tests for within-subject changes
within_subject_stats <- within_subject_changes %>%
group_by(group, carbohydrate_type, analyte) %>%
summarise(
n = n(),
mean_change = mean(change_0_to_48, na.rm = TRUE),
sd_change = sd(change_0_to_48, na.rm = TRUE),
sem_change = sd_change / sqrt(n),
t_stat = mean_change / sem_change,
p_value = 2 * pt(-abs(t_stat), df = n - 1), # two-tailed t-test
.groups = "drop"
) %>%
group_by(analyte) %>%
mutate(p_adj = p.adjust(p_value, method = "BH")) %>%
mutate(significance = case_when(
p_adj < 0.001 ~ "***",
p_adj < 0.01 ~ "**",
p_adj < 0.05 ~ "*",
TRUE ~ "ns"
))
# Case-only temporal analysis (0h vs 48h)
case_only_changes <- sample_data_long %>%
filter(group == "case", timepoint_hr %in% c("0", "48")) %>%
mutate(timepoint_hr = as.character(timepoint_hr)) %>%
pivot_wider(names_from = timepoint_hr,
values_from = concentration,
names_prefix = "time_") %>%
mutate(change_0_to_48 = time_48 - time_0) %>%
filter(!is.na(change_0_to_48))
# Statistical tests for case-only temporal changes
case_only_stats <- case_only_changes %>%
group_by(carbohydrate_type, analyte) %>%
summarise(
n = n(),
mean_change = mean(change_0_to_48, na.rm = TRUE),
sd_change = sd(change_0_to_48, na.rm = TRUE),
sem_change = sd_change / sqrt(n),
t_stat = mean_change / sem_change,
p_value = 2 * pt(-abs(t_stat), df = n - 1), # two-tailed t-test
.groups = "drop"
) %>%
group_by(analyte) %>%
mutate(p_adj = p.adjust(p_value, method = "BH")) %>%
mutate(significance = case_when(
p_adj < 0.001 ~ "***",
p_adj < 0.01 ~ "**",
p_adj < 0.05 ~ "*",
TRUE ~ "ns"
))
# Paired t-tests for case-only changes by analyte (pooled across carb types)
case_only_pooled_stats <- case_only_changes %>%
group_by(analyte) %>%
summarise(
n = n(),
mean_change = mean(change_0_to_48, na.rm = TRUE),
sd_change = sd(change_0_to_48, na.rm = TRUE),
sem_change = sd_change / sqrt(n),
t_stat = mean_change / sem_change,
p_value = 2 * pt(-abs(t_stat), df = n - 1),
.groups = "drop"
) %>%
mutate(p_adj = p.adjust(p_value, method = "BH")) %>%
mutate(significance = case_when(
p_adj < 0.001 ~ "***",
p_adj < 0.01 ~ "**",
p_adj < 0.05 ~ "*",
TRUE ~ "ns"
))
# Mixed-effects models for case-only temporal analysis
case_only_mixed_effects <- list()
# Filter data to case group only
case_only_data <- sample_data_long %>%
filter(group == "case") %>%
mutate(
timepoint_hr = factor(timepoint_hr),
subject = factor(subject),
carbohydrate_type = factor(carbohydrate_type,
levels = c("no_carbohydrate", "rapid_digestible", "slow_digestible"))
)
for (analyte_name in scfa_analytes) {
case_analyte_data <- case_only_data %>% filter(analyte == analyte_name)
# Model with time and carbohydrate effects, subject as random intercept
time_carb_model <- lmer(concentration ~ timepoint_hr * carbohydrate_type +
(1 | subject),
data = case_analyte_data,
REML = FALSE)
# Model with time effect only, subject as random intercept
time_only_model <- lmer(concentration ~ timepoint_hr + (1 | subject),
data = case_analyte_data,
REML = FALSE)
# Model with carbohydrate effect only, subject as random intercept
carb_only_model <- lmer(concentration ~ carbohydrate_type + (1 | subject),
data = case_analyte_data,
REML = FALSE)
# Null model (intercept + random effect only)
null_model <- lmer(concentration ~ 1 + (1 | subject),
data = case_analyte_data,
REML = FALSE)
# Store results
case_only_mixed_effects[[analyte_name]] <- list(
time_carb_model = time_carb_model,
time_only_model = time_only_model,
carb_only_model = carb_only_model,
null_model = null_model,
anova_time_carb = anova(time_carb_model),
summary_time_carb = summary(time_carb_model),
# Model comparisons
time_vs_null = anova(null_model, time_only_model),
carb_vs_null = anova(null_model, carb_only_model),
interaction_vs_main = anova(time_only_model, time_carb_model)
)
}
# Save subject-level analyses to CSV files
write.csv(subject_summary, "results/subject_level_summary.csv", row.names = FALSE)
write.csv(within_subject_stats, "results/within_subject_changes.csv", row.names = FALSE)
write.csv(case_only_stats, "results/case_only_temporal_changes.csv", row.names = FALSE)
write.csv(case_only_pooled_stats, "results/case_only_pooled_temporal_changes.csv", row.names = FALSE)
cat("Subject-level and case-only analyses saved to results/ directory\n")
## Subject-level and case-only analyses saved to results/ directory
Statistical Results
Group Comparisons (Control vs Case)
knitr::kable(group_stats, digits = 4,
caption = "Group Comparisons with Benjamini-Hochberg Correction")
Group Comparisons with Benjamini-Hochberg Correction
| 5aminovalerate |
concentration |
control |
case |
96 |
106 |
0.5327 |
199.9821 |
0.595 |
0.95 |
ns |
| acetate |
concentration |
control |
case |
96 |
106 |
0.0625 |
198.1122 |
0.950 |
0.95 |
ns |
| butyrate |
concentration |
control |
case |
96 |
106 |
0.7704 |
189.5584 |
0.442 |
0.95 |
ns |
| propionate |
concentration |
control |
case |
96 |
106 |
-0.2541 |
199.3221 |
0.800 |
0.95 |
ns |
| succinate |
concentration |
control |
case |
96 |
106 |
1.0312 |
189.8692 |
0.304 |
0.95 |
ns |
Carbohydrate Type Comparisons
knitr::kable(carb_stats, digits = 4,
caption = "ANOVA Results for Carbohydrate Type Effects")
ANOVA Results for Carbohydrate Type Effects
| 5aminovalerate |
carbohydrate_type |
2 |
199 |
0.195 |
0.823 |
|
0.002 |
0.8230 |
ns |
| acetate |
carbohydrate_type |
2 |
199 |
1.321 |
0.269 |
|
0.013 |
0.3363 |
ns |
| butyrate |
carbohydrate_type |
2 |
199 |
1.760 |
0.175 |
|
0.017 |
0.3363 |
ns |
| propionate |
carbohydrate_type |
2 |
199 |
1.575 |
0.210 |
|
0.016 |
0.3363 |
ns |
| succinate |
carbohydrate_type |
2 |
199 |
3.657 |
0.028 |
* |
0.035 |
0.1400 |
ns |
Post-hoc Carbohydrate Comparisons
knitr::kable(carb_posthoc, digits = 4,
caption = "Pairwise Comparisons vs No Carbohydrate Control")
Pairwise Comparisons vs No Carbohydrate Control
| 5aminovalerate |
concentration |
no_carbohydrate |
rapid_digestible |
66 |
68 |
0.5370 |
ns |
0.5967 |
ns |
| 5aminovalerate |
concentration |
no_carbohydrate |
slow_digestible |
66 |
68 |
0.6990 |
ns |
0.6990 |
ns |
| acetate |
concentration |
no_carbohydrate |
rapid_digestible |
66 |
68 |
0.2710 |
ns |
0.3871 |
ns |
| acetate |
concentration |
no_carbohydrate |
slow_digestible |
66 |
68 |
0.1140 |
ns |
0.2280 |
ns |
| butyrate |
concentration |
no_carbohydrate |
rapid_digestible |
66 |
68 |
0.2560 |
ns |
0.3871 |
ns |
| butyrate |
concentration |
no_carbohydrate |
slow_digestible |
66 |
68 |
0.0640 |
ns |
0.2003 |
ns |
| propionate |
concentration |
no_carbohydrate |
rapid_digestible |
66 |
68 |
0.0801 |
ns |
0.2003 |
ns |
| propionate |
concentration |
no_carbohydrate |
slow_digestible |
66 |
68 |
0.4900 |
ns |
0.5967 |
ns |
| succinate |
concentration |
no_carbohydrate |
rapid_digestible |
66 |
68 |
0.0617 |
ns |
0.2003 |
ns |
| succinate |
concentration |
no_carbohydrate |
slow_digestible |
66 |
68 |
0.0092 |
** |
0.0921 |
ns |
Three-way Interaction Analysis
# Check the structure and convert to data frame for proper column selection
interaction_display <- as.data.frame(interaction_stats) %>%
select(analyte, Effect, p, p.adj, p.adj.signif)
knitr::kable(interaction_display,
digits = 4,
caption = "Three-way ANOVA: Group × Carbohydrate × Time Interactions")
Three-way ANOVA: Group × Carbohydrate × Time Interactions
| 5aminovalerate |
group |
0.550 |
0.8108 |
ns |
| 5aminovalerate |
carbohydrate_type |
0.556 |
0.8108 |
ns |
| 5aminovalerate |
timepoint_hr |
0.000 |
0.0000 |
**** |
| 5aminovalerate |
group:carbohydrate_type |
0.900 |
0.9810 |
ns |
| 5aminovalerate |
group:timepoint_hr |
0.486 |
0.8082 |
ns |
| 5aminovalerate |
carbohydrate_type:timepoint_hr |
0.683 |
0.9562 |
ns |
| 5aminovalerate |
group:carbohydrate_type:timepoint_hr |
0.895 |
0.9810 |
ns |
| acetate |
group |
0.966 |
0.9810 |
ns |
| acetate |
carbohydrate_type |
0.018 |
0.0573 |
ns |
| acetate |
timepoint_hr |
0.000 |
0.0000 |
**** |
| acetate |
group:carbohydrate_type |
0.797 |
0.9810 |
ns |
| acetate |
group:timepoint_hr |
0.799 |
0.9810 |
ns |
| acetate |
carbohydrate_type:timepoint_hr |
0.162 |
0.4725 |
ns |
| acetate |
group:carbohydrate_type:timepoint_hr |
0.844 |
0.9810 |
ns |
| butyrate |
group |
0.232 |
0.6090 |
ns |
| butyrate |
carbohydrate_type |
0.016 |
0.0573 |
ns |
| butyrate |
timepoint_hr |
0.000 |
0.0000 |
**** |
| butyrate |
group:carbohydrate_type |
0.473 |
0.8082 |
ns |
| butyrate |
group:timepoint_hr |
0.311 |
0.6803 |
ns |
| butyrate |
carbohydrate_type:timepoint_hr |
0.012 |
0.0525 |
ns |
| butyrate |
group:carbohydrate_type:timepoint_hr |
0.467 |
0.8082 |
ns |
| propionate |
group |
0.508 |
0.8082 |
ns |
| propionate |
carbohydrate_type |
0.008 |
0.0450 |
* |
| propionate |
timepoint_hr |
0.000 |
0.0000 |
**** |
| propionate |
group:carbohydrate_type |
0.979 |
0.9810 |
ns |
| propionate |
group:timepoint_hr |
0.485 |
0.8082 |
ns |
| propionate |
carbohydrate_type:timepoint_hr |
0.009 |
0.0450 |
* |
| propionate |
group:carbohydrate_type:timepoint_hr |
0.981 |
0.9810 |
ns |
| succinate |
group |
0.257 |
0.6090 |
ns |
| succinate |
carbohydrate_type |
0.018 |
0.0573 |
ns |
| succinate |
timepoint_hr |
0.000 |
0.0000 |
**** |
| succinate |
group:carbohydrate_type |
0.936 |
0.9810 |
ns |
| succinate |
group:timepoint_hr |
0.433 |
0.8082 |
ns |
| succinate |
carbohydrate_type:timepoint_hr |
0.261 |
0.6090 |
ns |
| succinate |
group:carbohydrate_type:timepoint_hr |
0.934 |
0.9810 |
ns |
Subject-Level Analyses
Subject-Level Summary Statistics
knitr::kable(subject_summary, digits = 3,
caption = "Subject-Level Summary Statistics")
Subject-Level Summary Statistics
| control |
5aminovalerate |
16 |
0.549 |
0.156 |
0.039 |
| control |
acetate |
16 |
18.722 |
6.602 |
1.651 |
| control |
butyrate |
16 |
4.205 |
1.642 |
0.410 |
| control |
propionate |
16 |
2.736 |
1.157 |
0.289 |
| control |
succinate |
16 |
1.254 |
1.206 |
0.302 |
| case |
5aminovalerate |
18 |
0.500 |
0.305 |
0.072 |
| case |
acetate |
18 |
18.475 |
6.366 |
1.500 |
| case |
butyrate |
18 |
3.764 |
1.429 |
0.337 |
| case |
propionate |
18 |
2.833 |
1.160 |
0.273 |
| case |
succinate |
18 |
1.013 |
1.079 |
0.254 |
Within-Subject Changes (0h to 48h)
knitr::kable(within_subject_stats, digits = 4,
caption = "Within-Subject Changes from Baseline to 48h")
Within-Subject Changes from Baseline to 48h
| control |
no_carbohydrate |
5aminovalerate |
16 |
0.9675 |
0.5707 |
0.1427 |
6.7813 |
0.0000 |
0.0000 |
*** |
| control |
no_carbohydrate |
acetate |
16 |
26.0637 |
9.8419 |
2.4605 |
10.5930 |
0.0000 |
0.0000 |
*** |
| control |
no_carbohydrate |
butyrate |
16 |
4.8569 |
2.2522 |
0.5631 |
8.6259 |
0.0000 |
0.0000 |
*** |
| control |
no_carbohydrate |
propionate |
16 |
5.2625 |
2.3009 |
0.5752 |
9.1486 |
0.0000 |
0.0000 |
*** |
| control |
no_carbohydrate |
succinate |
16 |
1.2794 |
1.2816 |
0.3204 |
3.9931 |
0.0012 |
0.0035 |
** |
| control |
rapid_digestible |
5aminovalerate |
16 |
0.7809 |
0.3272 |
0.0818 |
9.5458 |
0.0000 |
0.0000 |
*** |
| control |
rapid_digestible |
acetate |
16 |
31.2150 |
12.5947 |
3.1487 |
9.9137 |
0.0000 |
0.0000 |
*** |
| control |
rapid_digestible |
butyrate |
16 |
7.2094 |
4.5932 |
1.1483 |
6.2783 |
0.0000 |
0.0000 |
*** |
| control |
rapid_digestible |
propionate |
16 |
3.3881 |
2.5521 |
0.6380 |
5.3104 |
0.0001 |
0.0001 |
*** |
| control |
rapid_digestible |
succinate |
16 |
1.5428 |
2.2543 |
0.5636 |
2.7376 |
0.0153 |
0.0183 |
* |
| control |
slow_digestible |
5aminovalerate |
16 |
0.8838 |
0.4453 |
0.1113 |
7.9380 |
0.0000 |
0.0000 |
*** |
| control |
slow_digestible |
acetate |
16 |
32.3325 |
11.6928 |
2.9232 |
11.0607 |
0.0000 |
0.0000 |
*** |
| control |
slow_digestible |
butyrate |
16 |
8.4188 |
3.9175 |
0.9794 |
8.5960 |
0.0000 |
0.0000 |
*** |
| control |
slow_digestible |
propionate |
16 |
4.5875 |
2.6021 |
0.6505 |
7.0520 |
0.0000 |
0.0000 |
*** |
| control |
slow_digestible |
succinate |
16 |
1.9950 |
2.3006 |
0.5752 |
3.4686 |
0.0034 |
0.0069 |
** |
| case |
no_carbohydrate |
5aminovalerate |
16 |
0.8238 |
0.5403 |
0.1351 |
6.0984 |
0.0000 |
0.0000 |
*** |
| case |
no_carbohydrate |
acetate |
16 |
27.9000 |
9.3007 |
2.3252 |
11.9991 |
0.0000 |
0.0000 |
*** |
| case |
no_carbohydrate |
butyrate |
16 |
5.3244 |
1.9566 |
0.4891 |
10.8850 |
0.0000 |
0.0000 |
*** |
| case |
no_carbohydrate |
propionate |
16 |
5.6719 |
2.4026 |
0.6007 |
9.4429 |
0.0000 |
0.0000 |
*** |
| case |
no_carbohydrate |
succinate |
16 |
0.6794 |
1.2179 |
0.3045 |
2.2314 |
0.0413 |
0.0413 |
* |
| case |
rapid_digestible |
5aminovalerate |
18 |
0.7725 |
0.6661 |
0.1570 |
4.9206 |
0.0001 |
0.0001 |
*** |
| case |
rapid_digestible |
acetate |
18 |
29.8242 |
12.0232 |
2.8339 |
10.5241 |
0.0000 |
0.0000 |
*** |
| case |
rapid_digestible |
butyrate |
18 |
6.0833 |
3.9148 |
0.9227 |
6.5928 |
0.0000 |
0.0000 |
*** |
| case |
rapid_digestible |
propionate |
18 |
3.7908 |
2.1631 |
0.5098 |
7.4353 |
0.0000 |
0.0000 |
*** |
| case |
rapid_digestible |
succinate |
18 |
1.2806 |
1.8354 |
0.4326 |
2.9601 |
0.0088 |
0.0132 |
* |
| case |
slow_digestible |
5aminovalerate |
18 |
0.7925 |
0.6820 |
0.1607 |
4.9302 |
0.0001 |
0.0001 |
*** |
| case |
slow_digestible |
acetate |
18 |
33.6878 |
11.0264 |
2.5989 |
12.9621 |
0.0000 |
0.0000 |
*** |
| case |
slow_digestible |
butyrate |
18 |
6.9603 |
2.9434 |
0.6938 |
10.0326 |
0.0000 |
0.0000 |
*** |
| case |
slow_digestible |
propionate |
18 |
4.7992 |
2.8815 |
0.6792 |
7.0662 |
0.0000 |
0.0000 |
*** |
| case |
slow_digestible |
succinate |
18 |
1.7786 |
1.8191 |
0.4288 |
4.1482 |
0.0007 |
0.0035 |
** |
Case-Only Temporal Analysis
Case Group Temporal Changes by Carbohydrate Type
knitr::kable(case_only_stats, digits = 4,
caption = "Case Group Only: Temporal Changes (0h to 48h) by Carbohydrate Type")
Case Group Only: Temporal Changes (0h to 48h) by Carbohydrate Type
| no_carbohydrate |
5aminovalerate |
16 |
0.8238 |
0.5403 |
0.1351 |
6.0984 |
0.0000 |
0.0001 |
*** |
| no_carbohydrate |
acetate |
16 |
27.9000 |
9.3007 |
2.3252 |
11.9991 |
0.0000 |
0.0000 |
*** |
| no_carbohydrate |
butyrate |
16 |
5.3244 |
1.9566 |
0.4891 |
10.8850 |
0.0000 |
0.0000 |
*** |
| no_carbohydrate |
propionate |
16 |
5.6719 |
2.4026 |
0.6007 |
9.4429 |
0.0000 |
0.0000 |
*** |
| no_carbohydrate |
succinate |
16 |
0.6794 |
1.2179 |
0.3045 |
2.2314 |
0.0413 |
0.0413 |
* |
| rapid_digestible |
5aminovalerate |
18 |
0.7725 |
0.6661 |
0.1570 |
4.9206 |
0.0001 |
0.0001 |
*** |
| rapid_digestible |
acetate |
18 |
29.8242 |
12.0232 |
2.8339 |
10.5241 |
0.0000 |
0.0000 |
*** |
| rapid_digestible |
butyrate |
18 |
6.0833 |
3.9148 |
0.9227 |
6.5928 |
0.0000 |
0.0000 |
*** |
| rapid_digestible |
propionate |
18 |
3.7908 |
2.1631 |
0.5098 |
7.4353 |
0.0000 |
0.0000 |
*** |
| rapid_digestible |
succinate |
18 |
1.2806 |
1.8354 |
0.4326 |
2.9601 |
0.0088 |
0.0132 |
* |
| slow_digestible |
5aminovalerate |
18 |
0.7925 |
0.6820 |
0.1607 |
4.9302 |
0.0001 |
0.0001 |
*** |
| slow_digestible |
acetate |
18 |
33.6878 |
11.0264 |
2.5989 |
12.9621 |
0.0000 |
0.0000 |
*** |
| slow_digestible |
butyrate |
18 |
6.9603 |
2.9434 |
0.6938 |
10.0326 |
0.0000 |
0.0000 |
*** |
| slow_digestible |
propionate |
18 |
4.7992 |
2.8815 |
0.6792 |
7.0662 |
0.0000 |
0.0000 |
*** |
| slow_digestible |
succinate |
18 |
1.7786 |
1.8191 |
0.4288 |
4.1482 |
0.0007 |
0.0020 |
** |
Case Group Temporal Changes (Pooled)
knitr::kable(case_only_pooled_stats, digits = 4,
caption = "Case Group Only: Temporal Changes (0h to 48h) Pooled Across Carbohydrate Types")
Case Group Only: Temporal Changes (0h to 48h) Pooled Across Carbohydrate Types
| 5aminovalerate |
52 |
0.7952 |
0.6239 |
0.0865 |
9.1914 |
0 |
0 |
*** |
| acetate |
52 |
30.5695 |
10.9553 |
1.5192 |
20.1218 |
0 |
0 |
*** |
| butyrate |
52 |
6.1534 |
3.0935 |
0.4290 |
14.3440 |
0 |
0 |
*** |
| propionate |
52 |
4.7187 |
2.5722 |
0.3567 |
13.2286 |
0 |
0 |
*** |
| succinate |
52 |
1.2680 |
1.6920 |
0.2346 |
5.4039 |
0 |
0 |
*** |
Case Group Mixed-Effects Models (Subject Random Effects)
# Display key results from case-only mixed-effects models
case_only_mixed_summary <- data.frame(
analyte = character(),
effect = character(),
f_value = numeric(),
p_value = numeric(),
stringsAsFactors = FALSE
)
for (analyte_name in names(case_only_mixed_effects)) {
anova_results <- case_only_mixed_effects[[analyte_name]]$anova_time_carb
for (i in 1:nrow(anova_results)) {
case_only_mixed_summary <- rbind(case_only_mixed_summary, data.frame(
analyte = analyte_name,
effect = rownames(anova_results)[i],
f_value = anova_results[i, "F value"],
p_value = anova_results[i, "Pr(>F)"],
stringsAsFactors = FALSE
))
}
}
# Add significance indicators
case_only_mixed_summary <- case_only_mixed_summary %>%
mutate(significance = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ "ns"
))
knitr::kable(case_only_mixed_summary, digits = 4,
caption = "Case Group Mixed-Effects Models: Time × Carbohydrate Effects with Subject Random Effects")
Case Group Mixed-Effects Models: Time × Carbohydrate Effects with Subject Random Effects
| acetate |
timepoint_hr |
577.7185 |
0.0000 |
*** |
| acetate |
carbohydrate_type |
3.7907 |
0.0263 |
* |
| acetate |
timepoint_hr:carbohydrate_type |
1.8513 |
0.1631 |
ns |
| butyrate |
timepoint_hr |
285.4882 |
0.0000 |
*** |
| butyrate |
carbohydrate_type |
1.3675 |
0.2601 |
ns |
| butyrate |
timepoint_hr:carbohydrate_type |
1.4359 |
0.2434 |
ns |
| propionate |
timepoint_hr |
298.4659 |
0.0000 |
*** |
| propionate |
carbohydrate_type |
4.4652 |
0.0142 |
* |
| propionate |
timepoint_hr:carbohydrate_type |
3.9977 |
0.0218 |
* |
| 5aminovalerate |
timepoint_hr |
118.4989 |
0.0000 |
*** |
| 5aminovalerate |
carbohydrate_type |
0.0794 |
0.9238 |
ns |
| 5aminovalerate |
timepoint_hr:carbohydrate_type |
0.0227 |
0.9776 |
ns |
| succinate |
timepoint_hr |
35.2564 |
0.0000 |
*** |
| succinate |
carbohydrate_type |
5.1620 |
0.0076 |
** |
| succinate |
timepoint_hr:carbohydrate_type |
1.7877 |
0.1734 |
ns |
Mixed-Effects Model Results
Mixed-Effects Model Summary
# Display key results from mixed-effects models
mixed_effects_summary <- data.frame(
analyte = character(),
effect = character(),
f_value = numeric(),
p_value = numeric(),
stringsAsFactors = FALSE
)
for (analyte_name in names(mixed_effects_results)) {
anova_results <- mixed_effects_results[[analyte_name]]$anova_full
for (i in 1:nrow(anova_results)) {
mixed_effects_summary <- rbind(mixed_effects_summary, data.frame(
analyte = analyte_name,
effect = rownames(anova_results)[i],
f_value = anova_results[i, "F value"],
p_value = anova_results[i, "Pr(>F)"],
stringsAsFactors = FALSE
))
}
}
# Add significance indicators
mixed_effects_summary <- mixed_effects_summary %>%
mutate(significance = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ "ns"
))
knitr::kable(mixed_effects_summary, digits = 4,
caption = "Mixed-Effects Models: F-values and P-values for Fixed Effects")
Mixed-Effects Models: F-values and P-values for Fixed Effects
| acetate |
group |
0.0020 |
0.9640 |
ns |
| acetate |
carbohydrate_type |
8.0168 |
0.0005 |
*** |
| acetate |
timepoint_hr |
1052.3296 |
0.0000 |
*** |
| acetate |
group:carbohydrate_type |
0.3870 |
0.6797 |
ns |
| acetate |
group:timepoint_hr |
0.0933 |
0.7604 |
ns |
| acetate |
carbohydrate_type:timepoint_hr |
3.5739 |
0.0301 |
* |
| acetate |
group:carbohydrate_type:timepoint_hr |
0.2820 |
0.7546 |
ns |
| butyrate |
group |
0.4723 |
0.4943 |
ns |
| butyrate |
carbohydrate_type |
6.4670 |
0.0020 |
** |
| butyrate |
timepoint_hr |
526.0814 |
0.0000 |
*** |
| butyrate |
group:carbohydrate_type |
1.2604 |
0.2861 |
ns |
| butyrate |
group:timepoint_hr |
1.3852 |
0.2408 |
ns |
| butyrate |
carbohydrate_type:timepoint_hr |
6.7286 |
0.0015 |
** |
| butyrate |
group:carbohydrate_type:timepoint_hr |
1.2364 |
0.2930 |
ns |
| propionate |
group |
0.1805 |
0.6722 |
ns |
| propionate |
carbohydrate_type |
8.0491 |
0.0005 |
*** |
| propionate |
timepoint_hr |
534.7998 |
0.0000 |
*** |
| propionate |
group:carbohydrate_type |
0.0371 |
0.9636 |
ns |
| propionate |
group:timepoint_hr |
0.7900 |
0.3753 |
ns |
| propionate |
carbohydrate_type:timepoint_hr |
7.6415 |
0.0007 |
*** |
| propionate |
group:carbohydrate_type:timepoint_hr |
0.0326 |
0.9680 |
ns |
| 5aminovalerate |
group |
0.1670 |
0.6842 |
ns |
| 5aminovalerate |
carbohydrate_type |
0.7584 |
0.4700 |
ns |
| 5aminovalerate |
timepoint_hr |
315.2684 |
0.0000 |
*** |
| 5aminovalerate |
group:carbohydrate_type |
0.1987 |
0.8200 |
ns |
| 5aminovalerate |
group:timepoint_hr |
0.8167 |
0.3674 |
ns |
| 5aminovalerate |
carbohydrate_type:timepoint_hr |
0.4816 |
0.6187 |
ns |
| 5aminovalerate |
group:carbohydrate_type:timepoint_hr |
0.2035 |
0.8161 |
ns |
| succinate |
group |
0.3143 |
0.5766 |
ns |
| succinate |
carbohydrate_type |
7.1550 |
0.0010 |
** |
| succinate |
timepoint_hr |
73.6895 |
0.0000 |
*** |
| succinate |
group:carbohydrate_type |
0.0733 |
0.9293 |
ns |
| succinate |
group:timepoint_hr |
0.9640 |
0.3275 |
ns |
| succinate |
carbohydrate_type:timepoint_hr |
2.1985 |
0.1141 |
ns |
| succinate |
group:carbohydrate_type:timepoint_hr |
0.0731 |
0.9295 |
ns |
Visualizations
SCFA Concentrations by Group and Carbohydrate Type
plot_by_group <- sample_data_long %>%
ggplot(aes(x = group, y = concentration, fill = group)) +
geom_boxplot(alpha = 0.8) +
facet_grid(carbohydrate_type ~ analyte, scales = "free_y") +
scale_fill_manual(values = group_colors) +
stat_compare_means(method = "t.test",
comparisons = list(c("control", "case")),
label = "p.signif",
step_increase = 0.1) +
labs(title = "SCFA Concentrations by Group and Carbohydrate Type",
x = "Group",
y = "Concentration (μM)",
fill = "Group") +
pub_theme +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_by_group

SCFA Concentrations by Carbohydrate Type and Time Point
plot_by_carb <- sample_data_long %>%
ggplot(aes(x = carbohydrate_type, y = concentration, fill = carbohydrate_type)) +
geom_boxplot(alpha = 0.8) +
facet_grid(timepoint_hr ~ analyte, scales = "free_y",
labeller = labeller(timepoint_hr = function(x) paste0(x, "h"))) +
scale_fill_manual(values = carb_colors) +
stat_compare_means(method = "t.test",
ref.group = "no_carbohydrate",
label = "p.signif",
step_increase = 0.1) +
labs(title = "SCFA Concentrations by Carbohydrate Type and Time Point",
x = "Carbohydrate Type",
y = "Concentration (μM)",
fill = "Carbohydrate Type") +
pub_theme +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom")
plot_by_carb

Time Series Analysis
plot_time_series <- sample_data_long %>%
mutate(timepoint_hr = as.numeric(as.character(timepoint_hr))) %>%
group_by(carbohydrate_type, timepoint_hr, analyte) %>%
summarise(
mean_conc = mean(concentration, na.rm = TRUE),
sem = sd(concentration, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
) %>%
ggplot(aes(x = timepoint_hr, y = mean_conc, color = carbohydrate_type)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = mean_conc - sem, ymax = mean_conc + sem),
width = 1, linewidth = 0.8) +
facet_wrap(~ analyte, scales = "free_y") +
scale_color_manual(values = carb_colors) +
scale_x_continuous(breaks = c(0, 48)) +
labs(title = "SCFA Concentrations Over Time by Carbohydrate Type",
x = "Time (hours)",
y = "Mean Concentration ± SEM (μM)",
color = "Carbohydrate Type") +
pub_theme +
theme(legend.position = "bottom")
plot_time_series

Concentration Heatmap
heatmap_data <- sample_data_long %>%
mutate(timepoint_hr = paste0(timepoint_hr, "h")) %>%
group_by(group, carbohydrate_type, timepoint_hr, analyte) %>%
summarise(mean_conc = mean(concentration, na.rm = TRUE), .groups = "drop") %>%
unite("condition", group, carbohydrate_type, timepoint_hr, sep = "_")
plot_heatmap <- heatmap_data %>%
ggplot(aes(x = condition, y = analyte, fill = mean_conc)) +
geom_tile(color = "white", linewidth = 0.5) +
scale_fill_viridis_c(name = "Mean\nConcentration\n(μM)", option = "plasma") +
labs(title = "SCFA Concentration Heatmap Across All Conditions",
x = "Condition (Group_Carbohydrate_Time)",
y = "SCFA Analyte") +
pub_theme +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.text.y = element_text(face = "italic"))
plot_heatmap

Interaction Effects
plot_interaction <- sample_data_long %>%
mutate(timepoint_hr = as.numeric(as.character(timepoint_hr))) %>%
group_by(group, carbohydrate_type, timepoint_hr, analyte) %>%
summarise(mean_conc = mean(concentration, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = timepoint_hr, y = mean_conc,
color = carbohydrate_type, linetype = group)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3, aes(shape = group)) +
facet_wrap(~ analyte, scales = "free_y") +
scale_color_manual(values = carb_colors) +
scale_x_continuous(breaks = c(0, 48)) +
scale_linetype_manual(values = c("control" = "solid", "case" = "dashed")) +
scale_shape_manual(values = c("control" = 16, "case" = 17)) +
labs(title = "SCFA Concentrations: Group × Carbohydrate × Time Interaction",
x = "Time (hours)",
y = "Mean Concentration (μM)",
color = "Carbohydrate Type",
linetype = "Group",
shape = "Group") +
pub_theme +
theme(legend.position = "bottom")
plot_interaction

Subject-Level Individual Response Heatmap
# Subject-level heatmap showing individual responses
subject_heatmap_data <- sample_data_long %>%
mutate(
condition_time = paste0(carbohydrate_type, "_", timepoint_hr, "h"),
mean_conc = concentration # Already averaged technical replicates
) %>%
arrange(group, subject, analyte)
# Create ordered subject labels with control group first, then case group
subject_order <- subject_heatmap_data %>%
select(subject, group) %>%
distinct() %>%
arrange(desc(group), subject) %>% # control comes before case alphabetically
mutate(
subject_group = paste0(subject, " (", group, ")"),
# Add spacing indicator for plotting
group_spacing = ifelse(group == "control", "Control Group", "Case Group")
)
# Create subject ordering with gap between groups
control_subjects <- subject_order %>% filter(group == "control") %>% arrange(subject)
case_subjects <- subject_order %>% filter(group == "case") %>% arrange(subject)
# Create spacer rows for the gap between groups
n_analytes <- length(unique(subject_heatmap_data$analyte))
n_conditions <- length(unique(subject_heatmap_data$condition_time))
# Create empty spacer data for gap
spacer_data <- expand_grid(
analyte = unique(subject_heatmap_data$analyte),
condition_time = unique(subject_heatmap_data$condition_time),
subject_group = "--- GROUP SEPARATOR ---",
mean_conc = NA_real_
)
# Create ordered factor levels with gap
all_subject_levels <- c(
rev(control_subjects$subject_group), # Control at top (reversed for ggplot)
"--- GROUP SEPARATOR ---", # Gap
rev(case_subjects$subject_group) # Case at bottom (reversed for ggplot)
)
# Add the ordered subject labels to the data and include spacer
subject_heatmap_data <- subject_heatmap_data %>%
left_join(subject_order %>% select(subject, subject_group, group_spacing), by = "subject") %>%
bind_rows(spacer_data) %>%
mutate(
subject_group = factor(subject_group, levels = all_subject_levels)
)
plot_subject_heatmap <- subject_heatmap_data %>%
ggplot(aes(x = condition_time, y = subject_group, fill = mean_conc)) +
geom_tile(color = "white", linewidth = 0.2) +
facet_wrap(~ analyte, scales = "free", ncol = 2) +
scale_fill_viridis_c(name = "Concentration\n(μM)", option = "plasma",
trans = "sqrt", na.value = "white") +
labs(title = "Individual Subject SCFA Responses Across All Conditions",
subtitle = "Control group (top) and Case group (bottom) with visual separation",
x = "Condition × Time Point",
y = "Subject (Group)") +
pub_theme +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
axis.text.y = element_text(size = 7),
strip.text = element_text(size = 10, face = "bold"),
panel.spacing = unit(0.5, "lines")
)
plot_subject_heatmap

Individual Subject Trajectories by Carbohydrate Source
# Prepare data for trajectory plots
trajectory_data <- sample_data_long %>%
mutate(
timepoint_hr = as.numeric(as.character(timepoint_hr)),
mean_conc = concentration # Already averaged technical replicates
)
# Calculate group means for overlay
group_means_trajectories <- trajectory_data %>%
group_by(group, carbohydrate_type, timepoint_hr, analyte) %>%
summarise(
mean_conc = mean(mean_conc, na.rm = TRUE),
sem = sd(mean_conc, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
# Create trajectory plots by carbohydrate source
plot_trajectories <- trajectory_data %>%
ggplot(aes(x = timepoint_hr, y = mean_conc)) +
# Individual subject lines (thin, semi-transparent)
geom_line(aes(group = subject, color = group),
alpha = 0.4, linewidth = 0.5) +
geom_point(aes(color = group), alpha = 0.5, size = 1) +
# Group mean lines (thick, prominent)
geom_line(data = group_means_trajectories,
aes(color = group),
linewidth = 2, alpha = 0.9) +
geom_point(data = group_means_trajectories,
aes(color = group),
size = 3, alpha = 0.9) +
# Error bars for group means
geom_errorbar(data = group_means_trajectories,
aes(ymin = mean_conc - sem, ymax = mean_conc + sem, color = group),
width = 1, linewidth = 1, alpha = 0.7) +
facet_grid(carbohydrate_type ~ analyte, scales = "free_y",
labeller = labeller(carbohydrate_type = function(x) {
str_replace_all(str_to_title(str_replace_all(x, "_", " ")),
c("No" = "No", "Rapid" = "Rapid", "Slow" = "Slow"))
})) +
scale_color_manual(values = group_colors) +
scale_x_continuous(breaks = c(0, 48)) +
labs(title = "Individual Subject SCFA Trajectories by Carbohydrate Source",
subtitle = "Thin lines: individual subjects, Thick lines: group means ± SEM",
x = "Time (hours)",
y = "Concentration (μM)",
color = "Group") +
pub_theme +
theme(
legend.position = "bottom",
strip.text = element_text(size = 10, face = "bold"),
panel.spacing = unit(0.3, "lines")
)
plot_trajectories

Case Group Only: Temporal Changes
# Case-only temporal changes visualization
case_only_plot_data <- sample_data_long %>%
filter(group == "case") %>%
mutate(timepoint_hr = as.numeric(as.character(timepoint_hr))) %>%
group_by(carbohydrate_type, timepoint_hr, analyte) %>%
summarise(
mean_conc = mean(concentration, na.rm = TRUE),
sem = sd(concentration, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
plot_case_only <- case_only_plot_data %>%
ggplot(aes(x = timepoint_hr, y = mean_conc, color = carbohydrate_type)) +
geom_line(linewidth = 1.5) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = mean_conc - sem, ymax = mean_conc + sem),
width = 1, linewidth = 1) +
facet_wrap(~ analyte, scales = "free_y") +
scale_color_manual(values = carb_colors) +
scale_x_continuous(breaks = c(0, 48)) +
labs(title = "Case Group Only: SCFA Temporal Changes by Carbohydrate Type",
subtitle = "Mean ± SEM concentrations from 0h to 48h",
x = "Time (hours)",
y = "Mean Concentration ± SEM (μM)",
color = "Carbohydrate Type") +
pub_theme +
theme(legend.position = "bottom")
plot_case_only

Case Group Individual Subject Trajectories
# Individual case subject trajectories with statistical overlay
case_individual_data <- sample_data_long %>%
filter(group == "case") %>%
mutate(
timepoint_hr = as.numeric(as.character(timepoint_hr)),
mean_conc = concentration # Already averaged technical replicates
)
plot_case_individual <- case_individual_data %>%
ggplot(aes(x = timepoint_hr, y = mean_conc)) +
# Individual subject lines
geom_line(aes(group = subject), alpha = 0.6, linewidth = 0.8, color = "#E74C3C") +
geom_point(alpha = 0.6, size = 2, color = "#E74C3C") +
# Group mean overlay
geom_line(data = case_only_plot_data,
aes(color = carbohydrate_type),
linewidth = 2, alpha = 0.9) +
geom_point(data = case_only_plot_data,
aes(color = carbohydrate_type),
size = 4, alpha = 0.9) +
facet_grid(carbohydrate_type ~ analyte, scales = "free_y",
labeller = labeller(carbohydrate_type = function(x) {
str_replace_all(str_to_title(str_replace_all(x, "_", " ")),
c("No" = "No", "Rapid" = "Rapid", "Slow" = "Slow"))
})) +
scale_color_manual(values = carb_colors) +
scale_x_continuous(breaks = c(0, 48)) +
labs(title = "Case Group Individual Subject SCFA Trajectories",
subtitle = "Thin lines: individual subjects, Thick lines: group means",
x = "Time (hours)",
y = "Concentration (μM)",
color = "Carbohydrate Type") +
pub_theme +
theme(
legend.position = "bottom",
strip.text = element_text(size = 10, face = "bold"),
panel.spacing = unit(0.3, "lines")
)
plot_case_individual

Save Plots
# Save all plots to files with appropriate dimensions
ggsave("plots/scfa_by_group.png", plot_by_group, width = 14, height = 10, dpi = 300)
ggsave("plots/scfa_by_carb.png", plot_by_carb, width = 14, height = 8, dpi = 300)
ggsave("plots/scfa_time_series.png", plot_time_series, width = 12, height = 8, dpi = 300)
ggsave("plots/scfa_heatmap.png", plot_heatmap, width = 14, height = 6, dpi = 300)
ggsave("plots/scfa_interaction.png", plot_interaction, width = 14, height = 10, dpi = 300)
ggsave("plots/scfa_subject_heatmap.png", plot_subject_heatmap, width = 16, height = 12, dpi = 300)
ggsave("plots/scfa_trajectories.png", plot_trajectories, width = 16, height = 12, dpi = 300)
ggsave("plots/scfa_case_only_temporal.png", plot_case_only, width = 12, height = 8, dpi = 300)
ggsave("plots/scfa_case_individual_trajectories.png", plot_case_individual, width = 16, height = 12, dpi = 300)
cat("All plots saved to plots/ directory with publication-quality formatting\n")
## All plots saved to plots/ directory with publication-quality formatting